# Goal: Assess accuracy of causal effects estimators, with and without survey weights
# Dependency: none

library(MatchIt)
library(weights)
library(Hmisc)

## SET SIMULATION PARAMETERS ##

## SET NUMBER OF OBSERVATIONS (N), NUMBER OF SIMULATIONS (J) ##

N_pop = 100000; J = 1000

PropB_pop <- 0.10 # proportion strata B in population

### Equal probability of selection:
### PropB_sample <- PropB_pop

### Unequal probability of selection 
PropB_sample <- 0.5 # proportion strata B in sample

### sample sizes

N_sample <- N_pop / 100

Na <- N_sample * (1 - PropB_sample)
Nb <- N_sample * (PropB_sample)

### assume stratified sampling
### probability of selection

Pa <- Na / (N_pop * (1 - PropB_pop))
Pb <- Nb / (N_pop * PropB_pop)

##------ SET PARAMETERS OF OUTCOME MODEL ------##

beta0 <- 1 # Treatment effect

### June 29 I changed betah from 0.5 to 1
### July 1 changed betah from 1 to 0.75

betah <- 0.75 # Treatment effect, dev strata B

### Average treatment effect:
avg.beta <- beta0 + betah * PropB_pop

alpha0 <- -0.25 ; alpha1 <- 0.60 ; alpha2 <- 0.30 # impact of Sb

##------ SET PARAMETERS OF PROPENSITY SCORE MODEL ------##

### June 29, changed gamma2 from 0.5 to 2
### July 1 changed gamma2 from 2 to 1

gamma0 <- -1 ; gamma1 <- 2  ; gamma2 <- 1

#--------------------------------------------------#
#-------------- Generate population data ----------#
#--------------------------------------------------#

### GENERATE CONFOUNDERS ###

set.seed(1)

X <- runif(N_pop, min=-1, max=1)

Sb <- c(rep(0, N_pop * (1 - PropB_pop)), rep(1, N_pop * PropB_pop))

#### COMPUTE TRUE PROPENSITY SCORE ####

invlogit <- function(x) {
  
  invlogit <- 1/(1 + exp(-x))
  
  invlogit
}

LP <- gamma0 + gamma1 * X + gamma2 * Sb

PS <- invlogit(LP) 

#### GENERATE TREATMENT ASSIGNMENTS BASED ON TRUE PROPENSITY SCORE ####

set.seed(2)

T <- rbinom(N_pop, 1, PS)

#### COMPUTE OUTCOME CONDITIONAL ON TREATMENT ASSIGNMENT AND CONFOUNDER ####

set.seed(3)

Y_error <- rnorm(N_pop, 0, 1)

Y <- alpha0 + alpha1 * X + alpha2 * Sb + (beta0 + betah * Sb) * T + Y_error

popdata <- data.frame(X, Sb, T, Y, PS, LP)

#### TRUE AVERAGE TREATMENT EFFECT ON THE TREATED

PropB_treated_pop <- prop.table(table(popdata$T,popdata$Sb), 1)[2,2]

avg.beta.treated <- beta0 + betah * PropB_treated_pop

#### SAVE TRUE AVERAGE TREATMENT EFFECTS

true.effect <- list("ATE" = avg.beta, "ATT" = avg.beta.treated)

#--------------------------------------------------#
#------- Generate data for simulation study -------#
#--------------------------------------------------#

data.sims<-NULL

set.seed(4)

for (j in 1:J) { # ITERATE OVER SIMULATIONS

#### STORE DATA USED IN SIMULATION STUDY ####

data.A.pop <- subset(popdata, Sb == 0)
data.A.samp <- data.A.pop[sample(1:nrow(data.A.pop), Na, replace=FALSE),]

data.B.pop <- subset(popdata, Sb == 1)
data.B.samp <- data.B.pop[sample(1:nrow(data.B.pop), Nb, replace=FALSE),]

data.sims[[j]]<- rbind(data.A.samp, data.B.samp)

data.sims[[j]]$W <- (1 - data.sims[[j]]$Sb) * Pb/Pa + data.sims[[j]]$Sb

}

##------ SET PARAMETERS OF MATCHING PROCEDURE ------#  

caliper.size <- 0 # If 0, no caliper matching
n.subclass <- 8
discard.string <-"control"

vATE.naive <- NULL
vATE.wnaive <- NULL
vATT.weight <- NULL
vATT.wweight <- NULL
vATT.weight.reg <- NULL
vATT.wweight.reg <- NULL
vATT.nn <- NULL
vATT.wnn <- NULL
vATT.nn.reg <- NULL
vATT.wnn.reg <- NULL
vATT.s <- NULL
vATT.s.reg <- NULL
vATT.ws <- NULL
vATT.ws.reg <- NULL

for (j in 1:J) { # ITERATE OVER SIMULATIONS
  
print(paste("Simulation Ner",j))
  
#-----------------------------#
#------- Naive ATE  ----------#
#-----------------------------#
  
# Without weights:
  
ATE.naive <- wtd.t.test(data.sims[[j]]$Y[data.sims[[j]]$T == 1], data.sims[[j]]$Y[data.sims[[j]]$T == 0], weight = rep(1, sum(data.sims[[j]]$T)), weighty = rep(1, sum(1 - data.sims[[j]]$T)), alternative="two.tailed", samedata = FALSE)$additional[c(1,4)]

# With weights:
  
ATE.wnaive <- wtd.t.test(data.sims[[j]]$Y[data.sims[[j]]$T == 1], data.sims[[j]]$Y[data.sims[[j]]$T == 0], weight = data.sims[[j]]$W[data.sims[[j]]$T == 1], weighty = data.sims[[j]]$W[data.sims[[j]]$T == 0], alternative="two.tailed", samedata = FALSE)$additional[c(1,4)]

# Store naive estimates of treatment effects

vATE.naive <- rbind(vATE.naive, ATE.naive)
vATE.wnaive <- rbind(vATE.wnaive, ATE.wnaive)
  
#------------------------------------------#
#------- Propensity Score Model  ----------#
#------------------------------------------#
  
# Assume Sb not observed, ignore survey weights
  
z.out1.ps <- glm(T ~ X,data = data.sims[[j]], family=binomial(link="logit"))
  
PS.mle1 <-z.out1.ps$fitted.values
LP.mle1 <- log(PS.mle1/(1-PS.mle1))
  
# Assume Sb not observed, control for survey weights instead (function of Sb)
  
z.out2.ps <- glm(T ~ X + W,data = data.sims[[j]], family=binomial(link="logit"))
  
PS.mle2 <-z.out2.ps$fitted.values
LP.mle2 <- log(PS.mle2/(1-PS.mle2))

#--------------------------------------------#
#------- Propensity Score Weighting----------#
#--------------------------------------------#
  
# Propensity score weights
  
data.sims[[j]][data.sims[[j]]$T == 1,"PS.weight1"] <- 1
data.sims[[j]][data.sims[[j]]$T == 0,"PS.weight1"] <- PS.mle1[data.sims[[j]]$T == 0]/ (1 - PS.mle1[data.sims[[j]]$T == 0])
  
data.sims[[j]][data.sims[[j]]$T == 1,"PS.weight2"] <- 1
data.sims[[j]][data.sims[[j]]$T == 0,"PS.weight2"] <- PS.mle2[data.sims[[j]]$T == 0]/ (1 - PS.mle2[data.sims[[j]]$T == 0])

# Treatment effects, without survey weights

 ## Without regression adjustment
  
ATT.weight <- wtd.t.test(data.sims[[j]]$Y[data.sims[[j]]$T == 1], data.sims[[j]]$Y[data.sims[[j]]$T == 0], weight = data.sims[[j]]$PS.weight1[data.sims[[j]]$T == 1], weighty = data.sims[[j]]$PS.weight1[data.sims[[j]]$T == 0], alternative="two.tailed", samedata = FALSE)$additional[c(1,4)]

 ## With regression adjustment
  
ATT.weight.reg <- summary(lm(Y ~ T + X, data = data.sims[[j]], weights = PS.weight1))$coefficients[2,c(1,2)]

 ## Store results of ps weighting w/o survey weights
  
vATT.weight <- rbind(vATT.weight, ATT.weight)
vATT.weight.reg <- rbind(vATT.weight.reg, ATT.weight.reg)
  
# Treatment effects, with survey weights
  
data.sims[[j]][,"adj.PS.weight"] <- data.sims[[j]]$PS.weight2 * data.sims[[j]]$W
  
 ## Without regression adjustment

ATT.wweight <- wtd.t.test(data.sims[[j]]$Y[data.sims[[j]]$T == 1], data.sims[[j]]$Y[data.sims[[j]]$T == 0], weight = data.sims[[j]]$adj.PS.weight[data.sims[[j]]$T == 1], weighty = data.sims[[j]]$adj.PS.weight[data.sims[[j]]$T == 0], alternative="two.tailed", samedata = FALSE)$additional[c(1,4)]

 ## With regression adjustment

ATT.wweight.reg <- summary(lm(Y ~ T + X, data = data.sims[[j]], weights = adj.PS.weight))$coefficients[2,c(1,2)]

 ## Store results of ps weighting w/ survey weights
  
vATT.wweight <- rbind(vATT.wweight, ATT.wweight)
vATT.wweight.reg <- rbind(vATT.wweight.reg, ATT.wweight.reg)
  
#------------------------------------------#
#------- Nearest Neighbor Matching  -------#
#------------------------------------------#

# Matching
  
 ## Without survey weights
  
set.seed(1000)
  
m.out1<-matchit(T ~ X,distance="logit",data=data.sims[[j]],method="nearest",caliper=caliper.size,m.order="largest",discard=discard.string, ratio = 2, replace = TRUE)
  
matched.data1 <- match.data(m.out1)
  
 ## With survey weights 
  
set.seed(1000)
  
m.out2<-matchit(T ~ X + W,distance="logit",data=data.sims[[j]],method="nearest",caliper=caliper.size,m.order="largest",discard=discard.string, ratio = 2, replace = TRUE)
  
matched.data2 <- rbind(data.sims[[j]][rownames(m.out2$match.matrix),],
                        data.sims[[j]][m.out2$match.matrix[,1],],
                        data.sims[[j]][m.out2$match.matrix[,2],])
matched.data2$W.matched.treated <- rep(data.sims[[j]][rownames(m.out2$match.matrix),]$W,3)
matched.data2$weights <- match.data(m.out2)[rownames(matched.data2),]$weights
  
# Treatment effects

 ## Without survey weights

  ### Without regression adjustment
  
ATT.nn <- wtd.t.test(matched.data1$Y[matched.data1$T == 1], matched.data1$Y[matched.data1$T == 0], weight = matched.data1$weights[matched.data1$T == 1], weighty = matched.data1$weights[matched.data1$T == 0], alternative="two.tailed", samedata = FALSE)$additional[c(1,4)]

  ### With regression adjustment

ATT.nn.reg <- summary(lm(Y ~ T + X, data = matched.data1, weights = weights))$coefficients[2,c(1,2)]

  ### Store results of nearest neighbor matching w/o survey weights
  
vATT.nn <- rbind(vATT.nn, ATT.nn)
vATT.nn.reg <- rbind(vATT.nn.reg, ATT.nn.reg)
  
 ## With survey weights
  
matched.data2$W2 <- matched.data2$W * matched.data2$weights

  ### Without regression adjustment
  
ATT.wnn <- wtd.t.test(matched.data2$Y[matched.data2$T == 1], matched.data2$Y[matched.data2$T == 0], weight = matched.data2$W2[matched.data2$T == 1], weighty = matched.data2$W2[matched.data2$T == 0], alternative="two.tailed", samedata = FALSE)$additional[c(1,4)]

  ### With regression adjustment
  
ATT.wnn.reg <- summary(lm(Y ~ T + X, data = matched.data2, weights = W2))$coefficients[2,c(1,2)]

  ### Store results of nearest neighbor matching w/ survey weights

vATT.wnn <- rbind(vATT.wnn, ATT.wnn)
vATT.wnn.reg <- rbind(vATT.wnn.reg, ATT.wnn.reg)
  
#-------------------------------------------#
#------- Subclassification Matching  -------#
#-------------------------------------------#
  
# Matching
  
 ## Without survey weights
  
m.out1 <- matchit(T ~ X,distance="logit",data=data.sims[[j]],method="subclass",discard=discard.string, subclass = n.subclass, sub.by = "all")
  
matched.data1 <- match.data(m.out1)

 ## With survey weights
  
m.out2 <- matchit(T ~ X + W,distance="logit",data=data.sims[[j]],method="subclass",discard=discard.string, subclass = n.subclass, sub.by = "all")
  
matched.data2 <- match.data(m.out2)
  
# Subclass weights
  
 ## Without survey weights
  
subclass.weights1 <- aggregate(T ~ subclass, data = subset(matched.data1), FUN = sum)$T / sum(matched.data1$T)

 ## With survey weights

subclass.weights2 <- aggregate((T * W) ~ subclass, data = subset(matched.data2), FUN = sum)[,"(T * W)"]/sum(matched.data2$T * matched.data2$W)
  
# Treatment effects
  
 ## Without survey weights
  
  ### Without regression adjustment
  
ATT.s <- wtd.t.test(matched.data1$Y[matched.data1$T == 1], matched.data1$Y[matched.data1$T == 0], weight = matched.data1$weights[matched.data1$T == 1], weighty = matched.data1$weights[matched.data1$T == 0], alternative="two.tailed", samedata = FALSE)$additional[c(1,4)]
  
  ### With regression adjustment
    
ATT.s.reg.list <- list()
  
for (s in 1:n.subclass) {
ATT.s.reg.list[[s]] <- summary(lm(Y ~ T + X, data = matched.data1[matched.data1$subclass == s,]))$coefficients[2,c(1,2)]
}
  
ATT.s.reg <- NULL
ATT.s.reg[1] <- weighted.mean(matrix(unlist(ATT.s.reg.list), ncol = 2, byrow = TRUE)[,1], w = subclass.weights1)
ATT.s.reg[2] <- sqrt(weighted.mean((matrix(unlist(ATT.s.reg.list), ncol = 2, byrow = TRUE)[,2])^2, w = subclass.weights1))

  ### Store results of subclassif matching w/o survey weights
  
vATT.s <- rbind(vATT.s, ATT.s)
vATT.s.reg <- rbind(vATT.s.reg, ATT.s.reg)
  
 ## With survey weights

matched.data2$W2 <- matched.data2$W * matched.data2$weights

  ### Without regression adjustment

ATT.ws <- wtd.t.test(matched.data2$Y[matched.data2$T == 1], matched.data2$Y[matched.data2$T == 0], weight = matched.data2$W2[matched.data2$T == 1], weighty = matched.data2$W2[matched.data2$T == 0], alternative="two.tailed", samedata = FALSE)$additional[c(1,4)]

  ### With regression adjustment

ATT.ws.reg.list <- list()

for (s in 1:n.subclass) {
ATT.ws.reg.list[[s]] <- summary(lm(Y ~ T + X, data = matched.data2[matched.data2$subclass == s,], weights = W2))$coefficients[2,c(1,2)]
}

ATT.ws.reg <- NULL
ATT.ws.reg[1] <- weighted.mean(matrix(unlist(ATT.ws.reg.list), ncol = 2, byrow = TRUE)[,1], w = subclass.weights2)
ATT.ws.reg[2] <- sqrt(weighted.mean((matrix(unlist(ATT.ws.reg.list), ncol = 2, byrow = TRUE)[,2])^2, w = subclass.weights2))

  ### Store results of subclassif matching w/ survey weights

vATT.ws <- rbind(vATT.ws, ATT.ws)
vATT.ws.reg <- rbind(vATT.ws.reg, ATT.ws.reg)

}

#--------------------------------------------------#
#------- Store results of entire procedure  -------#
#--------------------------------------------------#

sim.results <- list("true effect" = true.effect, "vATE.naive" = vATE.naive, "vATE.wnaive" = vATE.wnaive, "vATT.weight" = vATT.weight, "vATT.wweight" = vATT.wweight, "vATT.weight.reg" = vATT.weight.reg, "vATT.wweight.reg" = vATT.wweight.reg, "vATT.nn" = vATT.nn, "vATT.wnn" = vATT.wnn, "vATT.nn.reg" = vATT.nn.reg, "vATT.wnn.reg" = vATT.wnn.reg,  "vATT.s" = vATT.s, "vATT.ws" = vATT.ws, "vATT.s.reg" = vATT.s.reg, "vATT.ws.reg" = vATT.ws.reg)
save(sim.results, file = "sim_results.Rdata")
